import random
import csv
from random import choice

def random_list(start,stop,length):
    if length >= 0:
        length=int(length)
    start, stop = (int(start), int(stop)) if start <= stop else (int(stop), int(start))
    random_list = []
    for i in range(length):
        random_list.append(random.randint(start, stop))
    return random_list

def load_tag_to_idx(filename):
    # print("loading tag_to_idx...")
    tag_to_idx = {}
    fo = open(filename)
    for line in fo:
        line = line.strip().lstrip().rstrip()
        tag_to_idx[line] = len(tag_to_idx)
    fo.close()
    return tag_to_idx

# line_number = random_list(1,79000,200)
tag_to_idx = load_tag_to_idx('alldoc/corpora_train.label_to_idx')
foo = [0,1,2,3,4,5,6,7,8,9]
testcount = 1


with open("alldoc/corpora_test_1.csv", "w",newline='',encoding='utf-8') as csvfile: # 加newline= '',中间不会出现空行
    writer = csv.writer(csvfile)
    # 先写入columns_name
    writer.writerow(["txt_length", "ave_sentence_length", "ave_word_length", "richness_rate",
                     "idiom_count", "emptyword_count", "comma_count", "period_count", "question_mark_count",
                     "exclamation_mark", "colon_count", "semicolon_count", "punctuation_mark_count",
                     "noun_count", "verb_count", "label"])

    # with open('alldoc/corpora_train_1.csv', 'r', encoding='utf-8') as csvfile:
    #     reader = csv.DictReader(csvfile)
    #     for i, row in enumerate(reader):
    #         if i in line_number:
    #             writer.writerow(row.values())

    with open('alldoc/corpora.csv', 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for i, row in enumerate(reader):
            if row['label'] in tag_to_idx.keys():
                if choice(foo) < 2:
                    writer.writerow(row.values())
                    testcount += 1
                if testcount > 200:
                    break
